author: “Leonard Baum” date: “2024-16-01” output: html_document: code_folding: hide df_print: paged highlight: tango number_sections: no theme: cosmo toc: no word_document: toc: no pdf_document: toc: no —

Creating the DATA SET

In a first step the financial data from Chinese firms was downloaded from Bloomberg and uploaded in R. It includes all firms with the country of domicile being China that were part of the following GICS subindustries: IT Consulting & Other Services (GICS 45102010), Data Processing & Outsourced Services (GICS 45102020), Internet Services & Infrastructure (GICS 45102030), Application Software (GICS 45103010), Systems Software (GICS 45103020), Communications Equipement (GICS 45201020), Technology Hardware, Storage & Peripherals (GICS 45202030), Electronic Equipment & Instruments (GICS 45203010), Electronic Components (GICS 45203015), Electronic Manufacturing Services (GICS 45203020), Technology Distributors (GICS 45203030), Internet & Direct Marketing Retail (GICS 25502020), Interactive Media & Services (GICS 50203010) and Movies & Entertainment (GICS 50202010

The following information for 16 calendar quarters (Q1 2019 -Q4 2022) was collected: (1) GICS codes at the Subindustry level, (2) average market cap, (3) revenue and (4) profits as measured by earnings before interest and taxes (EBIT), (6) currency and (7) their financial market ticker as a unique identifier.

The data sets were uloaded piece by piece due to size limits and then joined by a unique identifier.

### importing datasets using relative paths



#currency + GICS

GICS_Cur_Exc <- read_excel("Data/GICS_Currency_Exchange_onlyfirms.xlsx")


#market cap

MC1920 <- read_excel("Data/Calender_Quarter/MarketCAP_qrt16,9_01012023_onlyfirms.xlsx")


MC2122 <- read_excel("Data/Calender_Quarter/MarketCAP_qrt8,1_01012023_onlyfirms.xlsx")


#revenue

Rev1920 <- read_excel("Data/Calender_Quarter/Revenue_qrt16,9_01012023_onlyfirms.xlsx")

Rev2122 <- read_excel("Data/Calender_Quarter/Revenue_qrt8,1_01012023_onlyfirms.xlsx")

#IBIT

IBIT1920 <- read_excel("Data/Calender_Quarter/IBIT_qrt16,9_01012023_onlyfirms.xlsx")

IBIT2122 <- read_excel("Data/Calender_Quarter/IBIT_qrt8,1_01012023_onlyfirms.xlsx")

  
  ### joining data sets 1 by 1
  
  df2<- full_join(GICS_Cur_Exc, MC1920, by = "Ticker")
df3<- full_join(df2, MC2122, by = "Ticker")
df4<- full_join(df3, Rev1920, by = "Ticker")
df5<- full_join(df4, Rev2122, by = "Ticker")
df6<- full_join(df5, IBIT1920, by = "Ticker")
dffull<- full_join(df6, IBIT2122, by = "Ticker")

Converting data to USD

In a second step, the spot exchanged rates were added and all values were converted to USD. In order to achieve this, the data set was filtered for firms without values for currency (2 columns were excluded that missed not only currency but almost all necessary data including names (688496 CH Equity, 301379)). Attached below is an overview over the missing financial information in the data set and the variable names used in the analysis.

#Dropping rows with NAs for currency

dffull <- dffull %>% drop_na(Curncy)

#changing data from wide to long
df_long <- dffull %>%
  pivot_longer(cols = starts_with("Market Cap:") | starts_with("Revenue:") | starts_with("EBIT:"),
               names_to = c("Variable", "Quarter"),
               names_sep = ":") %>%
  pivot_wider(names_from = "Variable",
              values_from = "value")


##### joining official exchange rates by quarter 

exch_rate <- read_excel("Data/Calender_Quarter/Exchange_rates_formated.xlsx")

dflong_1 <- left_join(df_long, exch_rate, by = c('Quarter'))

###renaming Market Cap to Market_CAP
dflong_1 <- dplyr::rename(dflong_1, Market_Cap = "Market Cap")



###calculating USD values for Market_CAP, Revenue and EBIT


df_adj <- dflong_1 %>%
  mutate(Adj_Market_Cap = if_else(Curncy == "USD", Market_Cap,
                                  if_else(Curncy == "CNY", Market_Cap * Exch_CNY,
                                          if_else(Curncy == "HKD", Market_Cap * Exch_HKD,
                                                  if_else(Curncy == "TWD", Market_Cap * Exch_TWD,
                                                          if_else(Curncy == "SGD", Market_Cap * Exch_SGD,
                                                                  if_else(Curncy == "AUD", Market_Cap * Exch_AUD, NA_real_)))))),
         Adj_Revenue = if_else(Curncy == "USD", Revenue,
                               if_else(Curncy == "CNY", Revenue * Exch_CNY,
                                       if_else(Curncy == "HKD", Revenue * Exch_HKD,
                                               if_else(Curncy == "TWD", Revenue * Exch_TWD,
                                                       if_else(Curncy == "SGD", Revenue * Exch_SGD,
                                                               if_else(Curncy == "AUD", Revenue * Exch_AUD, NA_real_)))))),
         Adj_EBIT = if_else(Curncy == "USD", EBIT,
                            if_else(Curncy == "CNY", EBIT * Exch_CNY,
                                    if_else(Curncy == "HKD", EBIT * Exch_HKD,
                                            if_else(Curncy == "TWD", EBIT * Exch_TWD,
                                                    if_else(Curncy == "SGD", EBIT * Exch_SGD,
                                                            if_else(Curncy == "AUD", EBIT * Exch_AUD, NA_real_)))))))   

# re-transform the relevant data back to a wide format

df_sel <- df_adj %>% select(1:3,6,15:17)

df_wide <- df_sel %>%
  pivot_wider(names_from = Quarter,
              values_from = c(Adj_Market_Cap, Adj_Revenue, Adj_EBIT))

# renaming the variables in preperation for the data analysis

# get list of variable names
var_names <- names(df_wide)

for (i in 1:length(var_names)) {
  if (grepl("Adj_Market_Cap_Q", var_names[i])) {
    new_name <- paste0("MC_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
  if (grepl("Adj_Revenue_Q", var_names[i])) {
    new_name <- paste0("Rev_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
  if (grepl("Adj_EBIT_Q", var_names[i])) {
    new_name <- paste0("EBIT_Q", abs(as.numeric(gsub("[^0-9]", "", var_names[i]))-16)+1)
    names(df_wide)[i] <- new_name
  }
}
#renaming variable subindustry
df_wide <- dplyr::rename(df_wide, GICS_SubInd = "GICS SubInd")

#turning GICS Subindustry into character variable
df_wide$GICS_SubInd <- as.character(df_wide$GICS_SubInd)

# check new variable names
names(df_wide)
 [1] "Ticker"      "Name"        "GICS_SubInd" "MC_Q1"       "MC_Q2"      
 [6] "MC_Q3"       "MC_Q4"       "MC_Q5"       "MC_Q6"       "MC_Q7"      
[11] "MC_Q8"       "MC_Q9"       "MC_Q10"      "MC_Q11"      "MC_Q12"     
[16] "MC_Q13"      "MC_Q14"      "MC_Q15"      "MC_Q16"      "Rev_Q1"     
[21] "Rev_Q2"      "Rev_Q3"      "Rev_Q4"      "Rev_Q5"      "Rev_Q6"     
[26] "Rev_Q7"      "Rev_Q8"      "Rev_Q9"      "Rev_Q10"     "Rev_Q11"    
[31] "Rev_Q12"     "Rev_Q13"     "Rev_Q14"     "Rev_Q15"     "Rev_Q16"    
[36] "EBIT_Q1"     "EBIT_Q2"     "EBIT_Q3"     "EBIT_Q4"     "EBIT_Q5"    
[41] "EBIT_Q6"     "EBIT_Q7"     "EBIT_Q8"     "EBIT_Q9"     "EBIT_Q10"   
[46] "EBIT_Q11"    "EBIT_Q12"    "EBIT_Q13"    "EBIT_Q14"    "EBIT_Q15"   
[51] "EBIT_Q16"   
# count the number of NAs by variable
na_counts <- colSums(is.na(df_wide))
na_counts 
     Ticker        Name GICS_SubInd       MC_Q1       MC_Q2       MC_Q3 
          0           0           0         445         434         423 
      MC_Q4       MC_Q5       MC_Q6       MC_Q7       MC_Q8       MC_Q9 
        396         365         347         321         264         239 
     MC_Q10      MC_Q11      MC_Q12      MC_Q13      MC_Q14      MC_Q15 
        206         177         158         131         105          79 
     MC_Q16      Rev_Q1      Rev_Q2      Rev_Q3      Rev_Q4      Rev_Q5 
         32         448         442         394         388         320 
     Rev_Q6      Rev_Q7      Rev_Q8      Rev_Q9     Rev_Q10     Rev_Q11 
        318         259         259         235         224         176 
    Rev_Q12     Rev_Q13     Rev_Q14     Rev_Q15     Rev_Q16     EBIT_Q1 
        168         165         168         130         119         454 
    EBIT_Q2     EBIT_Q3     EBIT_Q4     EBIT_Q5     EBIT_Q6     EBIT_Q7 
        445         402         394         327         319         265 
    EBIT_Q8     EBIT_Q9    EBIT_Q10    EBIT_Q11    EBIT_Q12    EBIT_Q13 
        264         243         233         181         175         172 
   EBIT_Q14    EBIT_Q15    EBIT_Q16 
        170         138         128 

Calculating Concentration Measures

In the next step, the four concentration measures - HHI Market Cap, CR4 Market Cap, HHI Revenue CR4 Revenue - were calculated for the 16 GICS subindsutries with the amount of firms ranging from 8 to 177 per market. The graphs below provides a graphical illustration of the development of the different market concentration measures – HHI Revenue, CR4 Revenue, HHI Market Cap, CR4 Market Cap – over the 16 quarters. The dotted line perpendicular to quarter 8 represents the cutoff point that delineates the time before and after the new regulatory approach took effect. For the two-revenue based concentration measures it is difficult to detect any pattern around the cutoff. This is different for the concentration measures based on market capitalization. The CR4 MC graph shows a general decrease in market concentration during the treatment period with the notable exception of the most concentrated markets. Meanwhile the more comprehensive HHI MC measure shows a sharp decline shortly after the cutoff for the two most concentrated markets (GICS 50203010: Interactive Media & Services; GICS 25502020: Internet & Direct Marketing Retail), while no substantial changes can be observed for the large number of low concentrated market.

### calculating HHI

# n_distinct(df_wide$GICS_SubInd)
###we have 16 different GICS subindustries
table(df_wide$GICS_SubInd)

25502020 45102010 45102020 45102030 45103010 45103020 45201020 45202030 
      35       86        8       22      129       39      114       48 
45203010 45203015 45203020 45203030 45301010 45301020 50202010 50203010 
     177      175       16       15       47      144       47       40 
#with the amount of firms ranging from 8 to 177 per sub-industry.

# create a list of unique GICS subindustries
subindustries <- unique(df_wide$GICS_SubInd)

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the total revenue in the subindustry, ignoring NAs
    total_revenue <- sum(subset_data[, 2], na.rm = TRUE)
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$market_share <- subset_data[, 2] / total_revenue
    
    # calculate the squared market share of each firm and sum them up
    subset_data$squared_market_share <- subset_data$market_share^2
    hhi <- sum(subset_data$squared_market_share, na.rm = TRUE)
    
    # assign the HHI value to the corresponding column and row in the original data
    col_name <- paste0("HHIRev_SubInd_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhi
  }
}

#Calculating the CR4 Concentration Ratio

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
   
    subset_data <- df_wide[, c("GICS_SubInd", paste0("Rev_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$market_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
    
    # select the market shares of the four largest firms, ignoring NAs
    top_four <- head(subset_data[order(subset_data$market_share, decreasing = TRUE), "market_share"], 4)
    
    # calculate the CR4
    cr4 <- sum(top_four, na.rm = TRUE)
    
    # assign the CR4 value to the corresponding column and row in the original data
    col_name <- paste0("CR4Rev_Subind_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4
  }
}


#Calculating the concentration measures for Market CAP 

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the total revenue in the subindustry, ignoring NAs
    total_MC <- sum(subset_data[, 2], na.rm = TRUE)
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$MC_share <- subset_data[, 2] / total_MC
    
    # calculate the squared market share of each firm and sum them up
    subset_data$squared_MC_share <- subset_data$MC_share^2
    hhiMC <- sum(subset_data$squared_MC_share, na.rm = TRUE)
    
    # assign the HHI value to the corresponding column and row in the original data
    col_name <- paste0("HHIMC_SubInd_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- hhiMC
  }
}

#Calculating the CR4 Concentration Ratio for MC

for (q in 1:16) {
  for (sub in subindustries) {
    # subset the data for the current quarter and subindustry
    
    subset_data <- df_wide[, c("GICS_SubInd", paste0("MC_Q", q))]
    subset_data <- subset_data[subset_data$GICS_SubInd == sub,]
    
    # calculate the market share of each firm in the subindustry, ignoring NAs
    subset_data$MC_share <- subset_data[, 2] / sum(subset_data[, 2], na.rm = TRUE)
    
    # select the market shares of the four largest firms, ignoring NAs
    top_four <- head(subset_data[order(subset_data$MC_share, decreasing = TRUE), "MC_share"], 4)
    
    # calculate the CR4
    cr4_MC <- sum(top_four, na.rm = TRUE)
    
    # assign the CR4 value to the corresponding column and row in the original data
    col_name <- paste0("CR4MC_Subind_Q", q)
    df_wide[df_wide$GICS_SubInd == sub, col_name] <- cr4_MC
  }
}

#First we create the data frames for the concentration measures

# Step 1:  HHI Revenue values per quarter per subindsutry

# select columns for HHI (REV) and GICS_SubInd
df_HHI <- df_wide %>%
  select(GICS_SubInd, starts_with("HHIRev_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_HHI <- df_HHI[!duplicated(df_HHI[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_HHI_Rev <- melt(df_HHI, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHI")

# Convert the Quarter variable to numeric
df_HHI_Rev$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_HHI_Rev$Quarter))

# Plot the data using ggplot2
ggplot(df_HHI_Rev, aes(x = Quarter, y = HHI, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHI", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("HHI Revenue") +
  theme_minimal()

# we can see some encouraging albeit small drop-offs for several 
#gics subindustries at Q8  which is our intended cut-off point 

### Step 2: CR4 Revenue

# select columns for CR4 (REV) and GICS_SubInd
df_CR4 <- df_wide %>%
  select(GICS_SubInd, starts_with("CR4Rev_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_CR4 <- df_CR4[!duplicated(df_CR4[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_CR4 <- melt(df_CR4, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4")

# Convert the Quarter variable to numeric
df_CR4$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4$Quarter))

# Plot the data using ggplot2
ggplot(df_CR4, aes(x = Quarter, y = CR4, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("CR4 Revenue") +
  theme_minimal()

### Step 3 HHI MC
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry

# select columns for HHI (Market Cap) and GICS_SubInd
df_HHIMC <- df_wide %>%
  select(GICS_SubInd, starts_with("HHIMC_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_HHIMC <- df_HHIMC[!duplicated(df_HHIMC[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_HHIMC <- melt(df_HHIMC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "HHIMC")

# Convert the Quarter variable to numeric
df_HHIMC$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC$Quarter))

# Plot the data using ggplot2
ggplot(df_HHIMC, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("HHI Market Cap") +
  theme_minimal()

##here we can see the sharp dropoffs after the regulatory approach for the 2 most concentrated markets

# Step 4: CR4 Market Cap
#Create a new data frame with HHI (Market Cap) values per quarter per subindsutry

# select columns for CR4 (Market Cap) and GICS_SubInd
df_CR4MC <- df_wide %>%
  select(GICS_SubInd, starts_with("CR4MC_SubInd_Q"))

##dropping all non-unique values so I have each subindustry only once
df_CR4MC <- df_CR4MC[!duplicated(df_CR4MC[, c("GICS_SubInd")]), ]

# Melt the data frame into long format
df_CR4MC <- melt(df_CR4MC, id.vars = "GICS_SubInd", variable.name = "Quarter", value.name = "CR4MC")

# Convert the Quarter variable to numeric
df_CR4MC$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC$Quarter))

# Plot the data using ggplot2
ggplot(df_CR4MC, aes(x = Quarter, y = CR4MC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4MC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
   ggtitle("CR4 Market Cap") +
  theme_minimal()

Testing Hypothesis 1: Change in regulatory approach has led to reduced market concentration.

In a preliminary step, the treatment variable is defined as a dummy variable based on the cutoff (i.e., start of new regulatory approach) being Quarter 8.

### Testing Hypothesis 1: Change in regulatory approach has led to reduced market concentration.

# In a first step, the treatment variable is defined as a dummy variable

# HHI Rev
# Create a binary treatment variable based on cutoff
df_HHI_Rev$treatment <- ifelse(df_HHI_Rev$Quarter >= 9, 1, 0)

#Second CR4 Rev

# Create a binary treatment variable based on cutoff
df_CR4$treatment <- ifelse(df_CR4$Quarter >= 9, 1, 0)

#third, HHI MC
# Create a binary treatment variable based on cutoff
df_HHIMC$treatment <- ifelse(df_HHIMC$Quarter >= 9, 1, 0)

#fourth, CR4 MC

# Create a binary treatment variable based on cutoff
df_CR4MC$treatment <- ifelse(df_CR4MC$Quarter >= 9, 1, 0)

In the initial phase of the analysis, we calculate the overall treatment effect of the new regulatory regime on China’s digital economy. This is accomplished through a two-way fixed effects regression model using panel data that remains agnostic about the relationship between time and market concentration. What we can see here is a highly significant negative treatment effect for the 2 Market Capitalisation Models (HHI MC - 3.2% and CR4 MC -2.4%) and a very small negative treatment effect for the HHI Rev model (-0.8 percent).The R-squared of the MC models are relatively low at 0.087 (HHI MC) and 0.056 (CR4 MC) but perform better than their Revenue counterparts.

# Convert the data to a pdata.frame object for HHIMC model
panelHHIMC <- pdata.frame(df_HHIMC, index = c("GICS_SubInd", "Quarter"))

pn1 <- plm(HHIMC ~ GICS_SubInd + treatment, data = panelHHIMC , model = "within")

#significant negative tretment effect (opposite to regular regression)

#for CR4MC 
panelCR4MC <- pdata.frame(df_CR4MC, index = c("GICS_SubInd", "Quarter"))
pn2<- plm(CR4MC ~ GICS_SubInd + treatment, data = panelCR4MC, model = "within")


####significant results!
#again negative treatment effect

#now for HHI Revenue 
panelHHIREV <- pdata.frame(df_HHI_Rev, index = c("GICS_SubInd", "Quarter"))
pn3 <- plm(HHI ~ GICS_SubInd + treatment, data = panelHHIREV, model = "within")


## very small negative treatment effect of 0.8 percent 


# for CR4 Revenu using df_CR4
panelCR4REV <- pdata.frame(df_CR4, index = c("GICS_SubInd", "Quarter"))
pn4 <- plm(CR4 ~ GICS_SubInd +  treatment, data = panelCR4REV,  model = "within")


# no signifiacnce

stargazer(pn1, pn2, pn3, pn4, title="Fixed Effects Panel Regression all markets", type = "text" )

Fixed Effects Panel Regression all markets
==============================================================
                                  Dependent variable:         
                          ------------------------------------
                            HHIMC     CR4MC     HHI      CR4  
                             (1)       (2)      (3)      (4)  
--------------------------------------------------------------
treatment                 -0.032*** -0.024*** -0.009** -0.006 
                           (0.007)   (0.006)  (0.004)  (0.004)
                                                              
--------------------------------------------------------------
Observations                 256       256      256      256  
R2                          0.087     0.056    0.019    0.008 
Adjusted R2                 0.026    -0.007    -0.046  -0.058 
F Statistic (df = 1; 239) 22.675*** 14.146*** 4.745**   2.018 
==============================================================
Note:                              *p<0.1; **p<0.05; ***p<0.01

As an additional test, a unit fixed (for GICS Subindustry) linear regression model is employed. The key difference here is that we assume a linear time trend and include Quarter as a control variable. With this assumption, all models here apart from the CR4 market cap model ( very small positive treatment effect of 0.038440 with a p value of 0.00158) are not significant. Ultimately, the two way fixed effects model is more robust because it has no assumptions of linearity.

#unit fixed effect model for all GICS subindustries

# for HHIMC using df_HHIMC

reg1 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = df_HHIMC)


# no statistically significant results for Treatment


#for CR4MC using df_CR4MC

reg2 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = df_CR4MC)

####significant treatment effect

#now for HHI Revenue 

reg3 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = df_HHI_Rev)


## no significant results


# for CR4 Revenu using df_CR4
reg4 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = df_CR4)

### as expected no significance


stargazer(reg1, reg2, reg3, reg4, title=" Unit-fixed effect model Treatment effect",type = "text")

Unit-fixed effect model Treatment effect
==========================================================================
                                           Dependent variable:            
                               -------------------------------------------
                                 HHIMC      CR4MC       HHI        CR4    
                                  (1)        (2)        (3)        (4)    
--------------------------------------------------------------------------
GICS_SubInd45102010            -0.580***  -0.738***  -0.215***  -0.607*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45102020            -0.230***    -0.018    0.054***   0.104*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45102030            -0.481***  -0.362***   0.317***    0.013   
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45103010            -0.566***  -0.638***  -0.220***  -0.637*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45103020            -0.489***  -0.392***  -0.103***  -0.273*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45201020            -0.566***  -0.650***  -0.148***  -0.343*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45202030            -0.381***  -0.291***    0.010    -0.118*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203010            -0.514***  -0.546***  -0.177***  -0.385*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203015            -0.570***  -0.643***  -0.185***  -0.484*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203020            -0.308***  -0.081***   0.320***   0.061*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45203030            -0.281***  -0.141***  -0.038***    -0.001  
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45301010            -0.524***  -0.488***  -0.118***  -0.298*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd45301020            -0.516***  -0.502***  -0.199***  -0.516*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd50202010            -0.504***  -0.434***  -0.138***  -0.324*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
GICS_SubInd50203010             0.282***    0.025     0.500***   0.091*** 
                                (0.019)    (0.017)    (0.011)    (0.012)  
                                                                          
Quarter                        -0.005***  -0.008***    -0.001     0.001   
                                (0.001)    (0.001)    (0.001)    (0.001)  
                                                                          
treatment                        0.008     0.038***    0.0004     -0.013  
                                (0.013)    (0.012)    (0.008)    (0.008)  
                                                                          
Constant                        0.646***   1.010***   0.261***   0.888*** 
                                (0.015)    (0.014)    (0.009)    (0.010)  
                                                                          
--------------------------------------------------------------------------
Observations                      256        256        256        256    
R2                               0.955      0.967      0.978      0.983   
Adjusted R2                      0.952      0.965      0.977      0.982   
Residual Std. Error (df = 238)   0.052      0.048      0.033      0.033   
F Statistic (df = 17; 238)     296.239*** 409.123*** 636.382*** 823.882***
==========================================================================
Note:                                          *p<0.1; **p<0.05; ***p<0.01

Zooming in on the BAT Markets:

In a second, subsequent analysis, a subset of China’s digital economy built from the primary markets of the BATs was examined. Baidu and Tencent both belong to the Interactive Media & Services subindustry (GICS 50203010) while Alibaba belongs to the Internet & Direct Marketing Retail subindustry (GICS 25502020). As you can see from the plots, it is only the HHI MC Models that portrays a sharp decline after the cutoff.

### creating a subset for the primary gics subindustries of the BATs for the different models

dfBAT1 <- subset(df_HHIMC, GICS_SubInd %in% c("50203010", "25502020"))
dfBAT2 <- subset(df_CR4MC, GICS_SubInd %in% c("50203010", "25502020"))
dfBAT3 <- subset(df_HHI_Rev, GICS_SubInd %in% c("50203010", "25502020"))
dfBAT4 <- subset(df_CR4, GICS_SubInd %in% c("50203010", "25502020"))

#let's plot it

ggplot(dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHIMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("HHI MC BAT Markets ") +
  theme_minimal()

ggplot(dfBAT2, aes(x = Quarter, y = CR4MC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4IMC", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("CR4 MC BAT Markets ") +
  theme_minimal()

ggplot(dfBAT3, aes(x = Quarter, y = HHI, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "HHI Rev", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("HHI Revenue BAT Markets ") +
  theme_minimal()

ggplot(dfBAT4, aes(x = Quarter, y = CR4, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "CR4 Rev", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("CR4 Revenue BAT Markets ") +
  theme_minimal()

Once again a panel regression analysis is employed to test the treatment effect in the BAT markets. We find for the HHI MC model a negative treatment effect of -5,5 percent that is significant at the 99 percent level and a stronger explanatory power in comparison to the panel regression model that took all markets into account.

#Now let's check how it works for BAT markets
panelBATHHIMC <- pdata.frame(dfBAT1, index = c("GICS_SubInd", "Quarter"))

pn5 <- plm(HHIMC ~ GICS_SubInd +  treatment, data = panelBATHHIMC, model = "within")
#again significant treatment effect, but this time weaker than with lm model, but stronger than general negative treatment effect 


#### Now let's do it for CR4
panelBATCR4MC <- pdata.frame(dfBAT2, index = c("GICS_SubInd", "Quarter"))

pn6 <- plm(CR4MC ~ GICS_SubInd + treatment, data = panelBATCR4MC, model = "within")
##not significant (consistent with lm)

#HHI Revenue
panelBATHHIREV <- pdata.frame(dfBAT3, index = c("GICS_SubInd", "Quarter"))

pn7 <- plm(HHI ~ GICS_SubInd + treatment, data = panelBATHHIREV,  model = "within")
#not significant (consistent with lm)

#CR4 Revenue
panelBATCR4REV <- pdata.frame(dfBAT4, index = c("GICS_SubInd", "Quarter"))

pn8 <- plm(CR4 ~ GICS_SubInd + treatment, data = panelBATCR4REV, model = "within")
### very minimal positive treatment effect

stargazer(pn5, pn6, pn7, pn8, title=" Fixed Effects Panel Regression BAT markets",type = "text")

Fixed Effects Panel Regression BAT markets
=========================================================
                               Dependent variable:       
                         --------------------------------
                          HHIMC    CR4MC    HHI     CR4  
                           (1)      (2)     (3)     (4)  
---------------------------------------------------------
treatment                -0.055**  0.002  -0.033* 0.007**
                         (0.025)  (0.003) (0.017) (0.003)
                                                         
---------------------------------------------------------
Observations                32      32      32      32   
R2                        0.144    0.017   0.111   0.168 
Adjusted R2               0.085   -0.051   0.050   0.110 
F Statistic (df = 1; 29) 4.870**   0.501  3.639*  5.848**
=========================================================
Note:                         *p<0.1; **p<0.05; ***p<0.01

In this chunk we check wether the general treatment effect established before can be attributed to the reduction of market concentration in BAT markets or stands on its own. To test this, we add an interaction effect between BAT markets and the treatment effect to our regression models. For our main model of interest HHI MC, the interaction term is not significant (only HHI Rev model has a negative significant interaction term) but including it in our model slightly reduces the general negative treatment effect from -3.2 percent to -2.9 percent. This indicates that the general treatment effect remains significant (albeit a bit smaller) even when taking into account the stronger treatment effect in BAT markets.

### Now let's see if there is an interaction effect between BAT markets and the treatment effect in order to test whether the found general treatment effect is only due to the BAT markets or not

# Creating a dummy variable for BAT subindustries "50203010", "25502020"

#HHIMC
panelHHIMC <- panelHHIMC %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

pn9 <- plm(HHIMC ~ GICS_SubInd + treatment*BAT, data = panelHHIMC , model = "within")
# no significant interaction effect but treatment effect is reduced from -0.03   to -0.02


#CR4MC
panelCR4MC <- panelCR4MC %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

pn10 <- plm(CR4MC ~ GICS_SubInd + treatment*BAT, data = panelCR4MC , model = "within")
#again no significant interaction effect remains relatively constant

#HHI Rev
panelHHIREV <- panelHHIREV %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

pn11 <- plm(HHI ~ GICS_SubInd + treatment*BAT, data = panelHHIREV , model = "within")
#interesting, here there is no general treatment effect but the interaction effect is significant -->stronger reduction in BAT markets

#CR4 Rev

panelCR4REV <- panelCR4REV %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

pn12 <- plm(CR4 ~ GICS_SubInd + treatment*BAT, data = panelCR4REV , model = "within")
#no significance, barely any R-Squared

stargazer(pn9, pn10, pn11, pn12, title=" Interaction BATs Treatment Effect",type = "text")

Interaction BATs Treatment Effect
==============================================================
                                  Dependent variable:         
                          ------------------------------------
                            HHIMC     CR4MC     HHI      CR4  
                             (1)       (2)      (3)      (4)  
--------------------------------------------------------------
treatment                 -0.029*** -0.028***  -0.005  -0.008*
                           (0.007)   (0.007)  (0.004)  (0.004)
                                                              
treatment:BAT              -0.026     0.030   -0.027**  0.015 
                           (0.020)   (0.019)  (0.012)  (0.013)
                                                              
--------------------------------------------------------------
Observations                 256       256      256      256  
R2                          0.093     0.065    0.039    0.014 
Adjusted R2                 0.028    -0.002    -0.029  -0.057 
F Statistic (df = 2; 238) 12.229*** 8.303***  4.883***  1.669 
==============================================================
Note:                              *p<0.1; **p<0.05; ***p<0.01

Here we show the performance of the panel regression for our main model of interest HHI MC. It shows the treatment effect in BAT markets and all markets in China’s digital economy.

#HHI MC BATs 

# Add predicted values to the data frame
panelBATHHIMC$predicted <- predict(pn5)

# Create line plot of actual and predicted values across quarters
ggplot(data = panelBATHHIMC, aes(x = Quarter, y = HHIMC, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predicted, color = "Predicted")) +
  scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) +
  labs(x = "Quarter", y = "HHIMC", title = "Panel Regression BAT Performance")

#HHI MC General

# Add predicted values to the data frame
panelHHIMC$predicted <- predict(pn1)

# Create line plot of actual and predicted values across quarters
ggplot(data = panelHHIMC, aes(x = Quarter, y = HHIMC, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predicted, color = "Predicted")) +
  scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) +
  labs(x = "Quarter", y = "HHIMC", title = "Panel Regression All markets Performance")

Again we also test for the treatmennt effect in BAT markets via a unit fixed (for GICS Subindustry) linear regression model that includes Quarter as a control variable. Here the HHI MC portrays an more substantial treatment effect of -14.5 percent at the 99,9% significance level. Also with an R^squared of 0.845 the model performs much better.

reg5 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
reg6 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment, data = dfBAT2)
reg7 <- lm(HHI ~ GICS_SubInd + Quarter + treatment, data = dfBAT3)
reg8 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = dfBAT4)


stargazer(reg5, reg6, reg7, reg8, title = "Unit fixed effects regression model BAT markets", type= "text")

Unit fixed effects regression model BAT markets
=======================================================================
                                         Dependent variable:           
                              -----------------------------------------
                                HHIMC     CR4MC      HHI        CR4    
                                 (1)       (2)       (3)        (4)    
-----------------------------------------------------------------------
GICS_SubInd50203010           0.282***  0.025***   0.500***   0.091*** 
                               (0.024)   (0.003)   (0.017)    (0.003)  
                                                                       
Quarter                        0.011**    0.001    -0.006*     0.001   
                               (0.005)   (0.001)   (0.004)    (0.001)  
                                                                       
treatment                     -0.143***  -0.004     0.018      0.002   
                               (0.047)   (0.006)   (0.033)    (0.006)  
                                                                       
Constant                      0.586***  0.958***   0.296***   0.883*** 
                               (0.031)   (0.004)   (0.022)    (0.004)  
                                                                       
-----------------------------------------------------------------------
Observations                     32        32         32         32    
R2                              0.845     0.757     0.971      0.974   
Adjusted R2                     0.829     0.731     0.967      0.972   
Residual Std. Error (df = 28)   0.067     0.008     0.047      0.008   
F Statistic (df = 3; 28)      50.973*** 29.059*** 307.130*** 355.499***
=======================================================================
Note:                                       *p<0.1; **p<0.05; ***p<0.01

Here we test for the interaction effect in the unit fixed linear regression model controlled for time.

#testing for interaction effect linear regression models

# for HHIMC using df_HHIMC
df_HHIMC <- df_HHIMC %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

int1 <- lm(HHIMC ~ GICS_SubInd + Quarter + treatment*BAT, data = df_HHIMC)

#for CR4MC using df_CR4MC
df_CR4MC <- df_CR4MC %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

int2 <- lm(CR4MC ~ GICS_SubInd + Quarter + treatment*BAT, data = df_CR4MC)


#now for HHI Revenue 
df_HHI_Rev <- df_HHI_Rev %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

int3 <- lm(HHI ~ GICS_SubInd + Quarter + treatment*BAT, data = df_HHI_Rev)

# for CR4 Revenu using df_CR4

df_CR4 <- df_CR4 %>%
  mutate(BAT = ifelse(GICS_SubInd %in% c("50203010", "25502020"), 1, 0))

int4 <- lm(CR4 ~ GICS_SubInd + Quarter + treatment, data = df_CR4)

stargazer(int1, int2, int3, int4, title=" Unit-fixed effect model Interaction effect",type = "text")

Unit-fixed effect model Interaction effect
===========================================================================================================================
                                                              Dependent variable:                                          
                    -------------------------------------------------------------------------------------------------------
                              HHIMC                     CR4MC                      HHI                       CR4           
                               (1)                       (2)                       (3)                       (4)           
---------------------------------------------------------------------------------------------------------------------------
GICS_SubInd45102010         -0.594***                 -0.723***                 -0.229***                 -0.607***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45102020         -0.243***                  -0.003                   0.041***                  0.104***         
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45102030         -0.495***                 -0.347***                 0.303***                    0.013          
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45103010         -0.579***                 -0.623***                 -0.234***                 -0.637***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45103020         -0.502***                 -0.378***                 -0.117***                 -0.273***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45201020         -0.579***                 -0.635***                 -0.162***                 -0.343***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45202030         -0.395***                 -0.276***                  -0.003                   -0.118***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45203010         -0.527***                 -0.531***                 -0.190***                 -0.385***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45203015         -0.583***                 -0.628***                 -0.198***                 -0.484***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45203020         -0.321***                 -0.066***                 0.306***                  0.061***         
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45203030         -0.294***                 -0.126***                 -0.052***                  -0.001          
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45301010         -0.537***                 -0.473***                 -0.132***                 -0.298***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd45301020         -0.530***                 -0.487***                 -0.213***                 -0.516***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd50202010         -0.517***                 -0.420***                 -0.152***                 -0.324***        
                             (0.021)                   (0.019)                   (0.013)                   (0.012)         
                                                                                                                           
GICS_SubInd50203010         0.282***                    0.025                   0.500***                  0.091***         
                             (0.018)                   (0.017)                   (0.011)                   (0.012)         
                                                                                                                           
Quarter                     -0.005***                 -0.008***                  -0.001                     0.001          
                             (0.001)                   (0.001)                   (0.001)                   (0.001)         
                                                                                                                           
treatment                     0.011                   0.035***                    0.004                    -0.013          
                             (0.013)                   (0.012)                   (0.008)                   (0.008)         
                                                                                                                           
BAT                                                                                                                        
                                                                                                                           
                                                                                                                           
treatment:BAT                -0.026                    0.030*                   -0.027**                                   
                             (0.020)                   (0.018)                   (0.012)                                   
                                                                                                                           
Constant                    0.658***                  0.997***                  0.273***                  0.888***         
                             (0.017)                   (0.016)                   (0.011)                   (0.010)         
                                                                                                                           
---------------------------------------------------------------------------------------------------------------------------
Observations                   256                       256                       256                       256           
R2                            0.955                     0.967                     0.979                     0.983          
Adjusted R2                   0.952                     0.965                     0.977                     0.982          
Residual Std. Error     0.052 (df = 237)          0.048 (df = 237)          0.032 (df = 237)          0.033 (df = 238)     
F Statistic         280.815*** (df = 18; 237) 389.356*** (df = 18; 237) 611.300*** (df = 18; 237) 823.882*** (df = 17; 238)
===========================================================================================================================
Note:                                                                                           *p<0.1; **p<0.05; ***p<0.01

Robustness Checks Linear Regression Model

In this chunk we do the robustness checks for the most relevant HHI MC model for the unit-fixed linear regression. First plot indicates that the regression model is linear. As an additional test, we played around with the Quarter variable checking if the treatment effect remains significant with Quarter being a higher order variable (it is). When testing with the studentized Breusch-Pagan test for heteroscedasticity we did not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be a possibility of heteroscedasticity. The subsequent plot indicates that the data may be a bit heteroscedastic in the higher range of the predictor variable(s). However, as an additional robust check we rerun the regression while logging the dependant variable and the treatment effect remains significant. Further, the result of another studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the logged model. Lastly, plots show that the residuals as well as the error terms are normally distributed. A gap in the middle of the last plots indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and this effect of the regulatory approach is not captured by the other variables in the model. In conclusion, the robustness checks confirm the validity of the traetment effect. While the model may not be perfectly linear, it passes all tests and the treatment effect persists in the robust models. Lastly, we provide a visual representation of the performance of the unit-fixed HHI MC Model.

#### Linearity
# Plotting fitted values against residuals
plot(reg5, 1)

#looks fine

#just in case I will do an additional robust check and see if the effect holds if Quarter is not linear but quadratic

r1 <- lm(HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)
summary(r1)

Call:
lm(formula = HHIMC ~ GICS_SubInd + Quarter^2 + treatment, data = dfBAT1)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.12754 -0.03566  0.01853  0.03991  0.15055 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          0.58575    0.03084  18.991  < 2e-16 ***
GICS_SubInd50203010  0.28159    0.02356  11.954 1.63e-12 ***
Quarter              0.01098    0.00514   2.136  0.04159 *  
treatment           -0.14291    0.04739  -3.015  0.00541 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.06663 on 28 degrees of freedom
Multiple R-squared:  0.8452,    Adjusted R-squared:  0.8287 
F-statistic: 50.97 on 3 and 28 DF,  p-value: 1.813e-11
#nothing changes, very good

#### testing for homosecasticity


bp_test <- bptest(reg5)
bp_test

    studentized Breusch-Pagan test

data:  reg5
BP = 7.5308, df = 3, p-value = 0.05677
#The studentized Breusch-Pagan test tests for heteroscedasticity in the errors of a linear regression model. The null hypothesis is that the errors are homoscedastic, while the alternative hypothesis is that they are heteroscedastic.At the 0.05 significance level, we do not have sufficient evidence to reject the null hypothesis of homoscedasticity. However, the p-value is relatively close to 0.05, indicating that there may be some evidence of heteroscedasticity. 

plot(reg5, 3)

# the slight diagonal drop  in the higher range of the fitted values is a bit worrying and suggests that the variance of the residuals is increasing, indicating that the data may be heteroscedastic in the higher range of the predictor variable(s).

#as a check I will log the dependant variable

r2 <- lm(log(HHIMC) ~ GICS_SubInd + Quarter + treatment, data = dfBAT1)
summary(r2)

Call:
lm(formula = log(HHIMC) ~ GICS_SubInd + Quarter + treatment, 
    data = dfBAT1)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.21058 -0.04584  0.01383  0.04897  0.18958 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         -0.560958   0.042618 -13.162 1.63e-13 ***
GICS_SubInd50203010  0.381510   0.032550  11.721 2.59e-12 ***
Quarter              0.018953   0.007103   2.668  0.01254 *  
treatment           -0.207275   0.065487  -3.165  0.00372 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09207 on 28 degrees of freedom
Multiple R-squared:  0.8404,    Adjusted R-squared:  0.8233 
F-statistic: 49.14 on 3 and 28 DF,  p-value: 2.786e-11
#still significant
bp_test_log <- bptest(r2)
bp_test_log

    studentized Breusch-Pagan test

data:  r2
BP = 5.8729, df = 3, p-value = 0.118
#The result of the studentized Breusch-Pagan test for the logged dependent variable suggests that there is no significant evidence of heteroscedasticity in the model.
# Further, even in the logged model the treatment effect was still significant & substantial. Lastly, we don't expect the time trend to be perfectly linear, this is just an aproximation.

#### Normality of residuals ####
plot(reg5, 2)

## residuals are close to the diagonal line indicating a normal distribution

plot(reg5$fitted.values, reg5$residuals)

#looks  fairly randomly distributed and suggests that there is no pattern in the errors and the assumptions of the linear regression model are being met. This indicates also that the residuals are uncorrelated and have constant variance, which are two important assumptions of linear regression. 
# The gap in the middle indicates that there is a significant difference in the dependent variable between the treated and untreated groups, and the treatment effect is not captured by the other variables in the model.

#ok all in all everything seems robust!

#here is a visual representation of how the model performs
# Add predicted values to the data frame
dfBAT1$predicted <- predict(reg5)

# Create line plot of actual and predicted values across quarters
ggplot(data = dfBAT1, aes(x = Quarter, y = HHIMC, group = GICS_SubInd)) +
  geom_line(aes(color = "Actual")) +
  geom_line(aes(y = predicted, color = "Predicted")) +
  scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) +
  labs(x = "Quarter", y = "HHIMC", title = "Regression Model Performance")

Testing Hypothesis 2: In the digital economy, market concentration does not serve as a good predictor of profits.

The following models explore if market concentration can function as a predictor of profits in the digital economy in China by looking both at total profits and profit margins. Looking at total firm profits first, all concentration measures – HHI Rev, CR4 Rev, HHI MC, CR4 MC – have a statistically significant relationship with EBIT. However, the concentration measures are bad predictors with R-squared values well below 1 percent, indicating that the market concentration measures included in the model do not have a strong relationship with firms’ profits and that other factors play a more important role.

### Testing hypothesis 2: Lower market concentration is correlated with a reduction in firm's profits

# creating a new data set with market concentration measures and profits

df_EBIT <- df_wide %>% select(2,3,36:115)

#creating different data sets per market concentration measures 

df_EBIT_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("EBIT_Q"),
    names_to = "Quarter",
    values_to = "EBIT"
  ) %>%
  select(Name, GICS_SubInd, Quarter, EBIT)

df_EBIT_long$Quarter <- as.numeric(sub("EBIT_Q", "", df_EBIT_long$Quarter))

df_HHIRev_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  )  %>%
  select(Name, GICS_SubInd, Quarter, HHIRev)

df_HHIRev_long$Quarter <- as.numeric(sub("HHIRev_SubInd_Q", "", df_HHIRev_long$Quarter))


df_CR4Rev_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("CR4Rev_Subind_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4Rev)

df_CR4Rev_long$Quarter <- as.numeric(sub("CR4Rev_Subind_Q", "", df_CR4Rev_long$Quarter))

df_HHIMC_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  )  %>%
  select(Name, GICS_SubInd, Quarter, HHIMC)

df_HHIMC_long$Quarter <- as.numeric(sub("HHIMC_SubInd_Q", "", df_HHIMC_long$Quarter))


df_CR4MC_long <- df_EBIT %>%
  pivot_longer(
    cols = starts_with("CR4MC_Subind_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  )  %>%
  select(Name, GICS_SubInd, Quarter, CR4MC)

df_CR4MC_long$Quarter <- as.numeric(sub("CR4MC_Subind_Q", "", df_CR4MC_long$Quarter))

#mergins the dfs 

EBIT_merg  <- df_EBIT_long %>%
  left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))

#transforming to panel data

EBIT_panel <- pdata.frame(EBIT_merg, index = c("Name", "Quarter"))

###starting with HHI REV
epan1 <- plm(EBIT ~ HHIRev, data = EBIT_panel, model = "within")

#CR4 Rev
epan2 <- plm(EBIT ~ HHIRev, data = EBIT_panel, model = "within")

#MC HHI
epan3 <- plm(EBIT ~ HHIMC, data = EBIT_panel, model = "within")

#MC CR4
epan4 <-  plm(EBIT ~ CR4MC, data = EBIT_panel, model = "within")

stargazer(epan1, epan2, epan3, epan4, title="Total Profits Fixed effects Regression Model",type = "text")

Total Profits Fixed effects Regression Model
===================================================================================================================
                                                              Dependent variable:                                  
                            ---------------------------------------------------------------------------------------
                                                                     EBIT                                          
                                     (1)                    (2)                    (3)                  (4)        
-------------------------------------------------------------------------------------------------------------------
HHIRev                      -13,296,484,105.000*** -13,296,484,105.000***                                          
                             (2,688,030,564.000)    (2,688,030,564.000)                                            
                                                                                                                   
HHIMC                                                                     -6,908,821,412.000***                    
                                                                           (1,817,286,387.000)                     
                                                                                                                   
CR4MC                                                                                             598,967,824.000  
                                                                                                (1,233,495,361.000)
                                                                                                                   
-------------------------------------------------------------------------------------------------------------------
Observations                        13,962                 13,962                13,962               13,962       
R2                                  0.002                  0.002                  0.001               0.00002      
Adjusted R2                         -0.077                 -0.077                -0.078               -0.079       
F Statistic (df = 1; 12940)       24.468***              24.468***              14.453***              0.236       
===================================================================================================================
Note:                                                                                   *p<0.1; **p<0.05; ***p<0.01
# all models with very low explanatory power

Next we look at Operating Profit Margin (EBIT Margin) that are calculated on the basis of EBIT and Revenue. Columns with revenue = 0 or NA were excluded. Running the same fixed effects regression models on profits margins shows no statistically significant relationship with the concentration measures. The models have an even lower R Squared value. All in all, these non-results indicate that market concentration holds virtually no explanatory value as a predictor of profit margins.

#calculating profit margins

##converting revenue values to long format

df_REV_long <- df_wide %>%
  pivot_longer(
    cols = starts_with("REV_Q"),
    names_to = "Quarter",
    values_to = "REV"
  ) %>%
  select(Name, GICS_SubInd, Quarter, REV)

#converting quarter to numeric
df_REV_long$Quarter <- as.numeric(sub("Rev_Q", "", df_REV_long$Quarter))

df_Profitmarg <- df_EBIT_long %>%
  left_join(df_REV_long, by = c("Name", "GICS_SubInd", "Quarter"))

#calculating profimarg.  round((df$EBIT / df$REV) * 100, 2)

df_Profitmarg$Profitmarg <- ifelse(is.na(df_Profitmarg$REV) | is.na(df_Profitmarg$EBIT), NA, round((df_Profitmarg$EBIT / df_Profitmarg$REV) * 100, 2))


df_Profitmarg <- df_Profitmarg %>%
  left_join(df_HHIRev_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_CR4Rev_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_HHIMC_long, by = c("Name", "GICS_SubInd", "Quarter"))

df_Profitmarg <- df_Profitmarg %>%
  left_join(df_CR4MC_long, by = c("Name", "GICS_SubInd", "Quarter"))

#### we have the problem that for some obervations are negative and positive infinity due to revenue being 0
#in order to do a regression we have to exclude these observations

df_Profitmarg <- df_Profitmarg[!df_Profitmarg$Profitmarg %in% c(-Inf, Inf),]

#transforming to panel data

pmarg_panel <- pdata.frame(df_Profitmarg, index = c("Name", "Quarter"))

###starting with HHI REV
pmarg1 <- plm(Profitmarg ~ HHIRev, data = pmarg_panel, model = "within")

### CR4 REV
pmarg2 <- plm(Profitmarg ~ CR4Rev, data = pmarg_panel, model = "within")

###HHI MC
pmarg3 <- plm(Profitmarg ~ HHIMC, data = pmarg_panel, model = "within")

# CR4 MC
###starting with HHI REV
pmarg4 <- plm(Profitmarg ~ CR4MC, data = pmarg_panel, model = "within")

stargazer(pmarg1, pmarg2, pmarg3, pmarg4, title="Profit Margins Fixed effects Regression Model",type = "text")

Profit Margins Fixed effects Regression Model
===============================================================================
                                            Dependent variable:                
                            ---------------------------------------------------
                                                Profitmarg                     
                                (1)          (2)          (3)          (4)     
-------------------------------------------------------------------------------
HHIRev                       59,223.210                                        
                            (76,040.910)                                       
                                                                               
CR4Rev                                     939.516                             
                                         (48,147.440)                          
                                                                               
HHIMC                                                  15,490.580              
                                                      (51,204.880)             
                                                                               
CR4MC                                                                -54.573   
                                                                   (34,314.370)
                                                                               
-------------------------------------------------------------------------------
Observations                   13,855       13,855       13,855       13,855   
R2                            0.00005      0.00000      0.00001       0.000    
Adjusted R2                    -0.079       -0.079       -0.079       -0.079   
F Statistic (df = 1; 12837)    0.607        0.0004       0.092       0.00000   
===============================================================================
Note:                                               *p<0.1; **p<0.05; ***p<0.01

Testing Hypothesis 3

The effect of the new regulatory approach on growth rates was tested (1) one the aggregated market level and (2) on the firm level. When plotting the calculating growth rates per GICS subindustry we can observe a cyclical yet slightly negative trend for market capitalisation growth rates and a cyclical yet stable trajectory for revenue growth rates. In all cases, no abnormal patterns can be observed around the cut off with the notable exception of the 45102020 subindustry (Data Processing & Outsourced Services) that is dropping off dramatically in quarter 9 before quickly rebounding in the market capitalisation graph.

#Testing Hypothesis 3:

# Building a new data set with the aggregate revenue and market cap values per subindustry 


# sum the market cap and revenue by subindustry and quarter
df_grow <- aggregate(cbind(MC_Q1, MC_Q2, MC_Q3, MC_Q4, MC_Q5, MC_Q6, MC_Q7, MC_Q8, MC_Q9,
                          MC_Q10, MC_Q11, MC_Q12, MC_Q13, MC_Q14, MC_Q15, MC_Q16, Rev_Q1,
                          Rev_Q2, Rev_Q3, Rev_Q4, Rev_Q5, Rev_Q6, Rev_Q7, Rev_Q8, Rev_Q9,
                          Rev_Q10, Rev_Q11, Rev_Q12, Rev_Q13, Rev_Q14, Rev_Q15, Rev_Q16) ~ GICS_SubInd, data = df_wide, sum)





#starting with revenue concentration measures


df_grow_Rev  <-  merge(df_grow, df_wide[, c("GICS_SubInd", "HHIRev_SubInd_Q1", "HHIRev_SubInd_Q2", "HHIRev_SubInd_Q3",
                                      "HHIRev_SubInd_Q4",  "HHIRev_SubInd_Q5",  "HHIRev_SubInd_Q6",  "HHIRev_SubInd_Q7", 
                                      "HHIRev_SubInd_Q8",  "HHIRev_SubInd_Q9",  "HHIRev_SubInd_Q10", "HHIRev_SubInd_Q11", "HHIRev_SubInd_Q12",
                                       "HHIRev_SubInd_Q13", "HHIRev_SubInd_Q14", "HHIRev_SubInd_Q15", "HHIRev_SubInd_Q16", "CR4Rev_Subind_Q1", 
                                       "CR4Rev_Subind_Q2",  "CR4Rev_Subind_Q3",  "CR4Rev_Subind_Q4",  "CR4Rev_Subind_Q5",  "CR4Rev_Subind_Q6", 
                                       "CR4Rev_Subind_Q7",  "CR4Rev_Subind_Q8",  "CR4Rev_Subind_Q9",  "CR4Rev_Subind_Q10", "CR4Rev_Subind_Q11",
                                      "CR4Rev_Subind_Q12", "CR4Rev_Subind_Q13", "CR4Rev_Subind_Q14", "CR4Rev_Subind_Q15", "CR4Rev_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)

#dropping non unique values
df_grow_Rev <- df_grow_Rev[!duplicated(df_grow_Rev[, c("GICS_SubInd")]), ] 


#transforming into long format

df_grow_1 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("Rev_Q"),
    names_to = "Quarter",
    values_to = "Rev"
  ) %>%
  select(GICS_SubInd, Quarter, Rev)

df_grow_1$Quarter <- as.numeric(gsub("Rev_Q", "", df_grow_1$Quarter))

df_grow_2 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  ) %>%
  select(GICS_SubInd, Quarter, HHIRev)

df_grow_2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", df_grow_2$Quarter))

df_grow_3 <- df_grow_Rev %>%
  pivot_longer(
    cols = starts_with("CR4Rev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(GICS_SubInd, Quarter, CR4Rev)

df_grow_3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", df_grow_3$Quarter))


dfgrow_Rev_long <- merge(merge(df_grow_1, df_grow_2, by = c("Quarter", "GICS_SubInd"), all = TRUE), 
                           df_grow_3, by = c("Quarter", "GICS_SubInd"), all = TRUE)

### now we are doing the same for market cap concentration measures

df_grow_MC  <-  merge(df_grow, df_wide[, c("GICS_SubInd",  "HHIMC_SubInd_Q1",   "HHIMC_SubInd_Q2",   "HHIMC_SubInd_Q3",   "HHIMC_SubInd_Q4",   "HHIMC_SubInd_Q5",  
                                            "HHIMC_SubInd_Q6",   "HHIMC_SubInd_Q7",   "HHIMC_SubInd_Q8",   "HHIMC_SubInd_Q9",   "HHIMC_SubInd_Q10", 
                                            "HHIMC_SubInd_Q11",  "HHIMC_SubInd_Q12",  "HHIMC_SubInd_Q13",  "HHIMC_SubInd_Q14",  "HHIMC_SubInd_Q15", 
                                            "HHIMC_SubInd_Q16",  "CR4MC_Subind_Q1",   "CR4MC_Subind_Q2",   "CR4MC_Subind_Q3",   "CR4MC_Subind_Q4",  
                                            "CR4MC_Subind_Q5",   "CR4MC_Subind_Q6",   "CR4MC_Subind_Q7",   "CR4MC_Subind_Q8",   "CR4MC_Subind_Q9",  
                                            "CR4MC_Subind_Q10",  "CR4MC_Subind_Q11",  "CR4MC_Subind_Q12",  "CR4MC_Subind_Q13",  "CR4MC_Subind_Q14", "CR4MC_Subind_Q15", "CR4MC_Subind_Q16")], by = "GICS_SubInd", all.x = TRUE)


#dropping non unique values
df_grow_MC <- df_grow_MC[!duplicated(df_grow_MC[, c("GICS_SubInd")]), ] 


#transforming into long format


df_grow_4 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(GICS_SubInd, Quarter, MC)

df_grow_4$Quarter <- as.numeric(gsub("MC_Q", "", df_grow_4$Quarter))


df_grow_5 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(GICS_SubInd, Quarter, HHIMC)

df_grow_5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_grow_5$Quarter))

df_grow_6 <- df_grow_MC %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(GICS_SubInd, Quarter, CR4MC)

df_grow_6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_grow_6$Quarter))

#merging MC data sets
dfgrow_MC_long <- merge(merge(df_grow_4, df_grow_5, by = c("Quarter", "GICS_SubInd"), all = TRUE), 
                         df_grow_6, by = c("Quarter", "GICS_SubInd"), all = TRUE)

#####no we calculate the growth rates

##### dplyr lag function is masked, needs to be specified


df_growthrateMC <- df_grow_4 %>%
  group_by(GICS_SubInd) %>%
  mutate(Growth_MC = (MC - dplyr::lag(MC)) / dplyr::lag(MC)
  ) %>%
  select(GICS_SubInd, Quarter, Growth_MC)



df_growthrateRev <- df_grow_1  %>% 
  group_by(GICS_SubInd) %>% 
  mutate(Growth_Rev = (Rev - dplyr::lag(Rev))/dplyr::lag(Rev)) %>% 
  select(GICS_SubInd, Quarter, Growth_Rev)

#merging MC data sets
dfgrow_MC_long <- merge(dfgrow_MC_long, df_growthrateMC, by = c("Quarter", "GICS_SubInd"), all = TRUE)


#merging Rev data sets
dfgrow_Rev_long <- merge(dfgrow_Rev_long, df_growthrateRev, by = c("Quarter", "GICS_SubInd"), all = TRUE)


##### Let's plot growth rates of Market Cap and Rev

plot1 <- ggplot(dfgrow_MC_long, aes(x = Quarter, y = Growth_MC, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "Growthrate Market Cap", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("MC Growthrate ") +
  theme_minimal()

##very confusing, one cant really detect any patterns
###sharp decline only in 1 GICS subindustry: 45102020 --> Data Processing & Outsourced Services 


plot2 <- ggplot(dfgrow_Rev_long , aes(x = Quarter, y = Growth_Rev, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "Growthrate Revenue", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("Rev Growthrate") +
  theme_minimal()
grid.arrange(plot1, plot2, ncol = 2, nrow = 1, widths = c(2, 2))

First, we test for the general treatment effect of the new regulatory regimes on growth rates at the market level via a panel regression. While the revenue models yield non-results, we find a robust negative treatment effect in both MC models.

#adding treatment variable

dfgrow_Rev_long$treatment <- ifelse(dfgrow_Rev_long$Quarter >= 9, 1, 0)

dfgrow_MC_long$treatment <- ifelse(dfgrow_MC_long$Quarter >= 9, 1, 0)

#conversion to panel data
pgrowthrateMC <- pdata.frame(dfgrow_MC_long, index = c("GICS_SubInd", "Quarter"))
pgrowthrateRev <- pdata.frame(dfgrow_Rev_long, index = c("GICS_SubInd", "Quarter"))

#panel regression

#first 2 MC concentration measures panel regression

grate1 <- plm(Growth_MC ~ HHIMC + treatment, data = pgrowthrateMC, model = "within")

grate2 <- plm(Growth_MC ~ CR4MC + treatment, data = pgrowthrateMC, model = "within")

# now for the 2 concentration measures for revenue
grate3 <- plm(Growth_Rev ~ HHIRev + treatment, data = pgrowthrateRev, model = "within")

grate4 <- plm(Growth_Rev ~ CR4Rev + treatment, data = pgrowthrateRev, model = "within")

stargazer(grate1, grate2, grate3, grate4, title = "Panel Regression Growthrates Market Level",type = "text")

Panel Regression Growthrates Market Level
=============================================================
                                  Dependent variable:        
                          -----------------------------------
                               Growth_MC        Growth_Rev   
                             (1)       (2)      (3)     (4)  
-------------------------------------------------------------
HHIMC                       0.149                            
                           (0.248)                           
                                                             
CR4MC                                0.617**                 
                                     (0.240)                 
                                                             
HHIRev                                        -0.126         
                                              (0.671)        
                                                             
CR4Rev                                                -1.005 
                                                      (0.646)
                                                             
treatment                 -0.144*** -0.135***  0.002  -0.003 
                           (0.025)   (0.024)  (0.040) (0.040)
                                                             
-------------------------------------------------------------
Observations                 240       240      240     240  
R2                          0.154     0.177   0.0002   0.011 
Adjusted R2                 0.089     0.114   -0.076  -0.065 
F Statistic (df = 2; 222) 20.208*** 23.881***  0.019   1.209 
=============================================================
Note:                             *p<0.1; **p<0.05; ***p<0.01

Secondly, we employ panel regressions for Firm-level growth rates. Here all 4 models yield non-results with extremely low R-squared values.

### we can also do that on firm level

# in order to do that we need to calculate the growth rate for revenue and market cap

#transforming df_wide into long



dflong1 <- df_wide %>%
  pivot_longer(
    cols = starts_with("Rev_Q"),
    names_to = "Quarter",
    values_to = "Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, Rev)

dflong1$Quarter <- as.numeric(gsub("Rev_Q", "", dflong1$Quarter))

dflong2 <- df_wide %>%
  pivot_longer(
    cols = starts_with("HHIRev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIRev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, HHIRev)

dflong2$Quarter <- as.numeric(gsub("HHIRev_SubInd_Q", "", dflong2$Quarter))

dflong3 <- df_wide %>%
  pivot_longer(
    cols = starts_with("CR4Rev_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4Rev"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4Rev)

dflong3$Quarter <- as.numeric(gsub("CR4Rev_Subind_Q", "", dflong3$Quarter))

dflong4 <- df_wide %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, MC)

dflong4$Quarter <- as.numeric(gsub("MC_Q", "", dflong4$Quarter))


dflong5 <- df_wide %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, HHIMC)

dflong5$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", dflong5$Quarter))

dflong6 <- df_wide %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, CR4MC)

dflong6$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", dflong6$Quarter))


#joining long data sets

dfgrowthrates <- left_join(dflong1, dflong2, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong3, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong4, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong5, by = c("Name", "GICS_SubInd", "Quarter")) %>%
  left_join(dflong6, by = c("Name", "GICS_SubInd", "Quarter"))

#calculating growth rates of Rev and MC firm level
#again dplyr function lag is masked and has to be specified

dfgrowthrates <- dfgrowthrates %>%
  arrange(Name, Quarter) %>%
  group_by(Name) %>%
  mutate(
    GrowthR_MC = (MC - dplyr::lag(MC)) / dplyr::lag(MC),
    GrowthR_Rev = (Rev - dplyr::lag(Rev)) / dplyr::lag(Rev)
  )

# now we add the treatment

dfgrowthrates$treatment <- ifelse(dfgrowthrates$Quarter >= 9, 1, 0)

#get rid of the infinity results
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_Rev %in% c(-Inf, Inf),]
dfgrowthrates <- dfgrowthrates[!dfgrowthrates$GrowthR_MC %in% c(-Inf, Inf),]

### now we can do our panel regression model

#conversion to panel data
pgrowthrates <- pdata.frame(dfgrowthrates, index = c("Name", "Quarter"))

# Rev
pgrate1 <- plm(GrowthR_Rev ~  HHIRev + treatment, data = pgrowthrates, model = "within")
summary(pgrate1)
Oneway (individual) effect Within Model

Call:
plm(formula = GrowthR_Rev ~ HHIRev + treatment, data = pgrowthrates, 
    model = "within")

Unbalanced Panel: n = 1012, T = 1-15, N = 12916

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-51.586686  -0.312363  -0.036619   0.200419 702.275126 

Coefficients:
          Estimate Std. Error t-value Pr(>|t|)
HHIRev    -3.85066    3.65914 -1.0523   0.2927
treatment -0.05418    0.14312 -0.3786   0.7050

Total Sum of Squares:    717670
Residual Sum of Squares: 717600
R-Squared:      0.00010556
Adj. R-Squared: -0.084997
F-statistic: 0.628283 on 2 and 11902 DF, p-value: 0.53352
pgrate2 <- plm(GrowthR_Rev ~   CR4Rev + treatment, data = pgrowthrates, model = "within")

#MC
pgrate3 <- plm(GrowthR_MC ~  HHIMC + treatment, data = pgrowthrates, model = "within")

pgrate4 <- plm(GrowthR_MC ~   CR4MC + treatment, data = pgrowthrates, model = "within")

stargazer(pgrate1, pgrate2, pgrate3, pgrate4, title="Panel Regreesion Firm level Growthrates", type = "text")

Panel Regreesion Firm level Growthrates
============================================================================================================
                                                   Dependent variable:                                      
             -----------------------------------------------------------------------------------------------
                             GrowthR_Rev                                     GrowthR_MC                     
                      (1)                   (2)                     (3)                       (4)           
------------------------------------------------------------------------------------------------------------
HHIRev              -3.851                                                                                  
                    (3.659)                                                                                 
                                                                                                            
CR4Rev                                    -0.625                                                            
                                          (2.281)                                                           
                                                                                                            
HHIMC                                                             -0.024                                    
                                                                  (0.189)                                   
                                                                                                            
CR4MC                                                                                       0.315**         
                                                                                            (0.138)         
                                                                                                            
treatment           -0.054                -0.054                 -0.100***                 -0.099***        
                    (0.143)               (0.143)                 (0.012)                   (0.012)         
                                                                                                            
------------------------------------------------------------------------------------------------------------
Observations        12,916                12,916                  13,027                    13,027          
R2                  0.0001                0.00002                  0.005                     0.006          
Adjusted R2         -0.085                -0.085                  -0.084                    -0.083          
F Statistic  0.628 (df = 2; 11902) 0.112 (df = 2; 11902) 32.712*** (df = 2; 11956) 35.340*** (df = 2; 11956)
============================================================================================================
Note:                                                                            *p<0.1; **p<0.05; ***p<0.01

Zooming in on the BAT Markets:

Again we take a close look at the impact of the regulatory regime in the primary markets of the BATs. At the firm level, the panel regression shows a negative correlation between market concentration and growth rates as well as negative treatment effect, that is significant only for both HHI models. However, all firm-level models offer very low explanatory value. In contrast, we find strong models with robust R-squared values at the market level. They show substantial and significant negative treatment effect for both MC models (decreased growthrate by around -0.23.).

#panel regression

#firm level
pBATgf <- subset(pgrowthrates, GICS_SubInd %in% c("50203010", "25502020"))

pbatgrowf1 <- plm(GrowthR_Rev ~  GICS_SubInd + HHIRev + treatment, data = pBATgf, model = "within")

pbatgrowf2 <- plm(GrowthR_Rev ~  GICS_SubInd + CR4Rev + treatment, data = pBATgf, model = "within")

#MC
pbatgrowf3 <- plm(GrowthR_MC ~  GICS_SubInd + HHIMC + treatment, data = pBATgf, model = "within")

pbatgrowf4 <- plm(GrowthR_MC ~  GICS_SubInd + CR4MC + treatment, data = pBATgf, model = "within")

stargazer(pbatgrowf1, pbatgrowf2, pbatgrowf3, pbatgrowf4, title="Panel Regreesion Firm level Growthrates in BATscontrolling for Subindustry", type = "text")

Panel Regreesion Firm level Growthrates in BATscontrolling for Subindustry
==================================================================================================
                                              Dependent variable:                                 
             -------------------------------------------------------------------------------------
                             GrowthR_Rev                                GrowthR_MC                
                      (1)                   (2)                   (3)                  (4)        
--------------------------------------------------------------------------------------------------
HHIRev              -0.428                                                                        
                    (0.481)                                                                       
                                                                                                  
CR4Rev                                    -4.924*                                                 
                                          (2.659)                                                 
                                                                                                  
HHIMC                                                          -0.873**                           
                                                                (0.354)                           
                                                                                                  
CR4MC                                                                                 5.954       
                                                                                     (3.731)      
                                                                                                  
treatment          -0.111**               -0.065               -0.117**              -0.045       
                    (0.044)               (0.045)               (0.055)              (0.046)      
                                                                                                  
--------------------------------------------------------------------------------------------------
Observations          648                   648                   888                  888        
R2                   0.011                 0.015                 0.009                0.004       
Adjusted R2         -0.087                -0.082                -0.082               -0.086       
F Statistic  3.200** (df = 2; 589) 4.531** (df = 2; 589) 3.492** (df = 2; 813) 1.713 (df = 2; 813)
==================================================================================================
Note:                                                                  *p<0.1; **p<0.05; ***p<0.01
#negative treatment effect, significant for HHI models, better but still worse R-squared and negative adj- R-squared

###market level
pBATgmMC <- subset(pgrowthrateMC, GICS_SubInd %in% c("50203010", "25502020"))
pBATgmRev<- subset(pgrowthrateRev, GICS_SubInd %in% c("50203010", "25502020"))

pbatgrowm1 <- plm(Growth_MC ~ HHIMC + treatment, data = pBATgmMC, model = "within")

pbatgrowm2 <- plm(Growth_MC ~ CR4MC + treatment, data = pBATgmMC, model = "within")

# now for the 2 concentration measures for revenue
pbatgrowm3 <- plm(Growth_Rev ~ HHIRev + treatment, data = pBATgmRev, model = "within")

pbatgrowm4 <- plm(Growth_Rev ~ CR4Rev + treatment, data = pBATgmRev, model = "within")

stargazer(pbatgrowm1, pbatgrowm2, pbatgrowm3, pbatgrowm4, title = "Panel Regression Growthrates BAT Markets",type = "text")

Panel Regression Growthrates BAT Markets
============================================================
                                 Dependent variable:        
                         -----------------------------------
                              Growth_MC        Growth_Rev   
                            (1)       (2)      (3)     (4)  
------------------------------------------------------------
HHIMC                     -0.193                            
                          (0.558)                           
                                                            
CR4MC                              10.504**                 
                                    (4.992)                 
                                                            
HHIRev                                       -0.132         
                                             (0.511)        
                                                            
CR4Rev                                               -1.921 
                                                     (2.891)
                                                            
treatment                -0.236*** -0.227*** -0.027  -0.010 
                          (0.080)   (0.066)  (0.047) (0.049)
                                                            
------------------------------------------------------------
Observations                30        30       30      30   
R2                         0.275     0.378    0.013   0.027 
Adjusted R2                0.191     0.306   -0.101  -0.085 
F Statistic (df = 2; 26)  4.930**  7.888***   0.173   0.362 
============================================================
Note:                            *p<0.1; **p<0.05; ***p<0.01
#both MC models show negative treatment effects while revenue models hold no explanatory power

The following chunk directly compares the MC growth models (market level) for all markets (models 1,2) and only BAT markets (models 3,4). We find a substantially larger negative treatment effect and a noticeably higher R-squared value for the BAT market models. This indicates not only correlation between the regulatory regime and reduced growth rates but a causal relationship between the new competition regime and reduced growth rates in BAT markets.

### now we compare the MC models at the market level for a all markets and  b BAT markets

stargazer(grate1, grate2, pbatgrowm1, pbatgrowm2,  title = "Panel Regression MC Growth rates Market Level", type = "text")

Panel Regression MC Growth rates Market Level
=======================================================================================================
                                                Dependent variable:                                    
             ------------------------------------------------------------------------------------------
                                                     Growth_MC                                         
                       (1)                     (2)                   (3)                   (4)         
-------------------------------------------------------------------------------------------------------
HHIMC                 0.149                                         -0.193                             
                     (0.248)                                       (0.558)                             
                                                                                                       
CR4MC                                        0.617**                                    10.504**       
                                             (0.240)                                     (4.992)       
                                                                                                       
treatment           -0.144***               -0.135***             -0.236***             -0.227***      
                     (0.025)                 (0.024)               (0.080)               (0.066)       
                                                                                                       
-------------------------------------------------------------------------------------------------------
Observations           240                     240                    30                   30          
R2                    0.154                   0.177                 0.275                 0.378        
Adjusted R2           0.089                   0.114                 0.191                 0.306        
F Statistic  20.208*** (df = 2; 222) 23.881*** (df = 2; 222) 4.930** (df = 2; 26) 7.888*** (df = 2; 26)
=======================================================================================================
Note:                                                                       *p<0.1; **p<0.05; ***p<0.01

Testing Hypothesis 4

A key indicator of competitiveness is the number of firms entering or leaving a market. Being limited to only publicly listed Chinese firms, we use the change in the number of listed firms as a (less than ideal) proxy for the openness of digital markets. The chunk below includes a graphical representation of the change in the number of firms per GICS subindustry.

### firms entering/leaving the market is approximated by difference in listed firms
#working with NAs here

###Overview NA's for market cap

df_na <- df_wide %>% select(4:19)

# count the number of NAs by variable
na_counts <- colSums(is.na(df_na))
na_counts
 MC_Q1  MC_Q2  MC_Q3  MC_Q4  MC_Q5  MC_Q6  MC_Q7  MC_Q8  MC_Q9 MC_Q10 MC_Q11 
   445    434    423    396    365    347    321    264    239    206    177 
MC_Q12 MC_Q13 MC_Q14 MC_Q15 MC_Q16 
   158    131    105     79     32 
#ok let's get the actual df with all relevant variables

df_NA <- df_wide %>% select(2:19, 84:115)

#converting it too long format 

df_NA_long1 <- df_NA %>%
  pivot_longer(
    cols = starts_with("MC_Q"),
    names_to = "Quarter",
    values_to = "MC"
  ) %>%
  select(Name, GICS_SubInd, Quarter, MC)

df_NA_long1$Quarter <- as.numeric(gsub("MC_Q", "", df_NA_long1$Quarter))


#transforming df so it contains NAs for MC per Quarter and Subindustry

df_NA_long1 <- df_NA_long1 %>% 
  group_by(GICS_SubInd, Quarter) %>% 
  summarize(Missing_MC = sum(is.na(MC)))

#creating df for MCHHI

df_NA_long2 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ] 

df_NA_long2 <- df_NA_long2 %>%
  pivot_longer(
    cols = starts_with("HHIMC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "HHIMC"
  ) %>%
  select(GICS_SubInd, Quarter, HHIMC)

df_NA_long2$Quarter <- as.numeric(gsub("HHIMC_SubInd_Q", "", df_NA_long2$Quarter))

#getting CR4MC

df_NA_long3 <- df_NA[!duplicated(df_NA[, c("GICS_SubInd")]), ] 

df_NA_long3 <- df_NA_long3 %>%
  pivot_longer(
    cols = starts_with("CR4MC_SubInd_Q"),
    names_to = "Quarter",
    values_to = "CR4MC"
  ) %>%
  select(GICS_SubInd, Quarter, CR4MC)

df_NA_long3$Quarter <- as.numeric(gsub("CR4MC_Subind_Q", "", df_NA_long3$Quarter))


###merging data sets

df_NA_long <- merge(merge(df_NA_long1, df_NA_long2, by = c("Quarter", "GICS_SubInd"), all = TRUE), df_NA_long3, by = c("Quarter", "GICS_SubInd"), all = TRUE)

df_NA_long <- df_NA_long %>% arrange(Quarter)

# Calculate the change in missing values by subindustry and quarter
df_NA_long <- df_NA_long %>%
  group_by(GICS_SubInd) %>%
  mutate(NewFirms = Missing_MC - dplyr::lag(Missing_MC)) %>%
  mutate(NewFirms =  -1 * NewFirms)   %>%
  ungroup()

#  Plot the data using ggplot2
ggplot(df_NA_long, aes(x = Quarter, y = NewFirms, group = GICS_SubInd, color = GICS_SubInd)) +
  geom_line() +
  labs(x = "Quarter", y = "New Firms", color = "GICS Subindustry") +
  geom_vline(xintercept = 8, linetype = "dotted") +
  ggtitle("Firms entering the Market") +
  theme_minimal()

#no real patterns detectable but there are some dropoffs around the cutoff

Due to the unique listing and delisting process in China it is not possible to assess the treatment effect of the new regulatory regime. In this preliminary analysis we simply test for a correlation between our market concentration measures (HHI MC, CR4 MC) and the openness of digital markets with naive and unit-fixed linear regression models. Across all four models, we find a robust negative correlation.

reg20a <- lm(NewFirms ~ HHIMC, data = df_NA_long)


reg20b <- lm(NewFirms ~ CR4MC, data = df_NA_long)


stargazer(reg20a, reg20b, title="Naive Regression Market Concentration New Firms", type = "text")

Naive Regression Market Concentration New Firms
===========================================================
                                   Dependent variable:     
                               ----------------------------
                                         NewFirms          
                                    (1)            (2)     
-----------------------------------------------------------
HHIMC                            -2.717***                 
                                  (0.617)                  
                                                           
CR4MC                                           -3.645***  
                                                 (0.552)   
                                                           
Constant                          2.307***      3.880***   
                                  (0.198)        (0.355)   
                                                           
-----------------------------------------------------------
Observations                        240            240     
R2                                 0.075          0.155    
Adjusted R2                        0.071          0.151    
Residual Std. Error (df = 238)     2.258          2.159    
F Statistic (df = 1; 238)        19.366***      43.602***  
===========================================================
Note:                           *p<0.1; **p<0.05; ***p<0.01
#the higher the HHI and market concentration, the lower the number of new (listed) firms entering the market
#-->highly significant effect
#-->adj r-squared at around 7 percent

#now checking for time-fixed effect

reg20c <- lm(NewFirms ~ HHIMC + Quarter, data = df_NA_long)
#summary(reg20c)

reg20d <- lm(NewFirms ~ CR4MC + Quarter, data = df_NA_long)
#summary(reg20d)

stargazer(reg20a, reg20b, reg20c, reg20d, title="Relationship Market Concentration and New Firms", type = "text")

Relationship Market Concentration and New Firms
===================================================================================================================
                                                          Dependent variable:                                      
                    -----------------------------------------------------------------------------------------------
                                                               NewFirms                                            
                              (1)                     (2)                     (3)                     (4)          
-------------------------------------------------------------------------------------------------------------------
HHIMC                      -2.717***                                       -2.637***                               
                            (0.617)                                         (0.617)                                
                                                                                                                   
CR4MC                                              -3.645***                                       -3.578***       
                                                    (0.552)                                         (0.552)        
                                                                                                                   
Quarter                                                                     0.057*                   0.053         
                                                                            (0.034)                 (0.032)        
                                                                                                                   
Constant                   2.307***                3.880***                1.773***                3.367***        
                            (0.198)                 (0.355)                 (0.370)                 (0.474)        
                                                                                                                   
-------------------------------------------------------------------------------------------------------------------
Observations                  240                     240                     240                     240          
R2                           0.075                   0.155                   0.086                   0.164         
Adjusted R2                  0.071                   0.151                   0.079                   0.157         
Residual Std. Error    2.258 (df = 238)        2.159 (df = 238)        2.249 (df = 237)        2.151 (df = 237)    
F Statistic         19.366*** (df = 1; 238) 43.602*** (df = 1; 238) 11.214*** (df = 2; 237) 23.285*** (df = 2; 237)
===================================================================================================================
Note:                                                                                   *p<0.1; **p<0.05; ***p<0.01

We try to corroborate our results with a panel regression. The models show also a negtaive relationship between market concentration and number of new firms. However, here we find no significant results.

# adding treatment variable
df_NA_long$treatment <- ifelse(df_NA_long$Quarter >= 9, 1, 0)

#converting to panel data
pNA <- pdata.frame(df_NA_long, index = c("GICS_SubInd", "Quarter"))

#panel regressions

#removing the NA values (quarter 1)
pNA  <- na.omit(pNA )

pNA1 <- plm(NewFirms ~  HHIMC, data = pNA, model = "within")
summary(pNA1 )
Oneway (individual) effect Within Model

Call:
plm(formula = NewFirms ~ HHIMC, data = pNA, model = "within")

Balanced Panel: n = 16, T = 15, N = 240

Residuals:
    Min.  1st Qu.   Median  3rd Qu.     Max. 
-4.81146 -0.86579 -0.19963  0.78651  8.18426 

Coefficients:
      Estimate Std. Error t-value Pr(>|t|)
HHIMC -0.42734    2.32843 -0.1835   0.8545

Total Sum of Squares:    705.6
Residual Sum of Squares: 705.49
R-Squared:      0.00015103
Adj. R-Squared: -0.071587
F-statistic: 0.0336841 on 1 and 223 DF, p-value: 0.85455
# relevant positive treatment effect

pNA2 <- plm(NewFirms ~  CR4MC, data = pNA, model = "within")


stargazer(pNA1, pNA2, title="Panel Regression Concentration Number of new firms", type = "text")

Panel Regression Concentration Number of new firms
======================================================
                              Dependent variable:     
                          ----------------------------
                                    NewFirms          
                               (1)            (2)     
------------------------------------------------------
HHIMC                         -0.427                  
                             (2.328)                  
                                                      
CR4MC                                       -3.244    
                                            (2.319)   
                                                      
------------------------------------------------------
Observations                   240            240     
R2                            0.0002         0.009    
Adjusted R2                   -0.072        -0.062    
F Statistic (df = 1; 223)     0.034          1.958    
======================================================
Note:                      *p<0.1; **p<0.05; ***p<0.01